
** Loading raw data from ministry of education on apprentices
use "$data\STIL_aftaler2012_2015", clear
keep pnr // keeping all indivduals recorded as being employed as an apprentices

** Reshaping by year to have to create an empty dataset with 48 (month) observations for each individual
bysort pnr: keep if _n==1
forval t = 2008/2015{
	local s2 = (`t'-2007)*12
	local s1 = `s2'-11
	
	forval j=`s1'/`s2'{
		ge t`j' = .
	}
}
	
reshape long t, i(pnr) j(month)
drop t
save "$data\STIL_pnr_panel_2012_2015", replace

** Loading raw data from ministry of education on apprentices
use "$data\STIL_aftaler2012_2015", clear
ge app_start_month = mofd(startdato) - 575
ge app_end_month = mofd(slutdato) - 575

sort pnr app_start

collapse (max) app_end_month, by(pnr app_start_month) // Individuals with competing apprentiship - ie. they start one and switch the next day/week: we record by month so they appear competing.
sort pnr app_start
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
replace app_end_month = app_end_month[_n+1] if pnr == pnr[_n+1] & app_end_month >= app_start_month[_n+1] & app_end_month < app_end_month[_n+1] // a few indv. have overlapping app. I select min start date and max end date. 
sort pnr app_start
drop if pnr == pnr[_n-1] & app_start_month >= app_start_month[_n-1] & app_end_month <= app_end_month[_n-1] // Some have apprentiship within in apprentiship - I drop the enclosed.
drop if pnr == pnr[_n-1] & app_start_month >= app_start_month[_n-1] & app_end_month <= app_end_month[_n-1] // running it twice a some individuals have more than one stint within their main period.
drop if pnr == pnr[_n-1] & app_start_month >= app_start_month[_n-1] & app_end_month <= app_end_month[_n-1] // running it a third time a some individuals have more than one stint within their main period.

ge month = app_start_month // Month variable is required to merge with STIL_pnr_panel_2012_2015
save "$data\STIL_pnr_panel_startmonth", replace // dataset has id and month identifiers for start and finish month


** Constructing dataset that includes individuals ID, month, app_start_month, and app_end_month.
use "$data\STIL_pnr_panel_2012_2015", clear
merge 1:1 pnr month using "$data\STIL_pnr_panel_startmonth", nogen keep(match master)
**  app_start_month, and app_end_month are only filled out for the actual month 
** I use the carry forward command to fill out the months
bysort pnr: carryforward app_start_month, gen(help_var)
bysort pnr: carryforward app_end_month, gen(help_var2)
replace help_var =. if month > help_var2
replace help_var2 =. if month > help_var2
rename help_var apprentice
drop help_var2 app_start_month app_end_month
replace apprentice = 1 if apprentice !=.
** data includes ID, month, and apprentices dummy
save "$data\minwage_data_app_panel_dummy.dta", replace

***************************** MAIN ESTIMATION DATA *****************************
use "$data\minwage_data4_2012_2015.dta", clear
merge 1:1 pnr month using  "$data\minwage_data_app_panel_dummy.dta", nogen keep(master match) 
replace apprentice = 0 if apprentice == .
save "$data\minwage_data5_estimation_data_2012_2015.dta", replace
********************************************************************************
